/*    Copyright 2010 Tobias Marschall
 *
 *    This file is part of MoSDi.
 *
 *    MoSDi is free software: you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation, either version 3 of the License, or
 *    (at your option) any later version.
 *
 *    MoSDi is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with MoSDi.  If not, see <http://www.gnu.org/licenses/>.
 */

package mosdi.subcommands;

import java.util.ArrayList;
import java.util.List;

import mosdi.fa.Alphabet;
import mosdi.fa.CDFA;
import mosdi.fa.DFAFactory;
import mosdi.fa.GeneralizedString;
import mosdi.fa.GeneralizedStringsEditNFA;
import mosdi.fa.GeneralizedStringsHammingNFA;
import mosdi.fa.GeneralizedStringsNFA;
import mosdi.fa.NFA;
import mosdi.util.BitArray;
import mosdi.util.Iupac;
import mosdi.util.Log;
import mosdi.util.NamedSequence;
import mosdi.util.SequenceUtils;

public class MapSubcommand extends Subcommand {

	@Override
	public String usage() {
		return
		super.usage()+" [options] <reads.fasta> <reference.fasta>\n" +
		"\n" +
		"Options:\n" +
		"  -e <errors>: number of allowed errors (default: 1)\n" +
		"  -E: use edit distance (default: Hamming distance)\n" +
		"  -r <number>: number of reads to be searched for simultaneously (default:100)\n" +
		"  -D: convert NFA into DFA\n" +
		"  -m: minimize DFA";
	}

	@Override
	public String description() {
		return
		"Maps reads to a reference.";
	}

	@Override
	public String name() {
		return "map";
	}

	@Override
	public int run(String[] args) {
		parseOptions(args, 2, "Ee:r:Dm");

		// Option dependencies
		impliedOptions("m", "D");

		// Mandatory arguments
		String readFilename = getStringArgument(0);
		String refFilename = getStringArgument(1);

		// Options
		int errors = getNonNegativeIntOption("e", 1);
		boolean editDistance = getBooleanOption("E", false);
		int simulateousReads = getPositiveIntOption("r", 100);
		boolean useDFA = getBooleanOption("D", false);
		boolean minimizeDFA = getBooleanOption("m", false);

		Alphabet dnaAlphabet = Alphabet.getDnaAlphabet();
		Log.startTimer();
		List<NamedSequence> reads = null;
		List<NamedSequence> refs = null;
		try {
			reads = SequenceUtils.readFastaFile(readFilename, dnaAlphabet, false);
			refs = SequenceUtils.readFastaFile(refFilename, dnaAlphabet, true);
		} catch (Exception e) {
			Log.errorln(e.toString());
			System.exit(1);
		}
		Log.restartTimer("Reading FASTA files");
		int n = 0;
		while (n<reads.size()) {
			Log.startTimer();
			List<GeneralizedString> l = new ArrayList<GeneralizedString>(2*simulateousReads);
			int i=0;
			for (; i<simulateousReads; ++i) {
				if (n==reads.size()) break;
				int[] read = reads.get(n++).getSequence();
				l.addAll(Iupac.toGeneralizedStrings(dnaAlphabet.buildString(read), true));
			}
			NFA nfa = null;
			if (errors==0) {
				nfa = new GeneralizedStringsNFA(l);
			} else {
				if (editDistance) {
					nfa = new GeneralizedStringsEditNFA(l,errors);
				} else {
					nfa = new GeneralizedStringsHammingNFA(l,errors);
				}
			}
			Log.stopTimer("Constructing NFA");
			Log.printf(Log.Level.DEBUG, "Constructed NFA for %d reads\n", i);
			if (useDFA) {
				Log.startTimer();
				CDFA cdfa = DFAFactory.build(dnaAlphabet, nfa, 1000000);
				Log.restartTimer("Constructing DFA");
				Log.printf(Log.Level.DEBUG, "DFA has %d states\n", cdfa.getStateCount());
				if (minimizeDFA) {
					cdfa = cdfa.minimize(); 
					Log.restartTimer("Minimizing DFA");
					Log.printf(Log.Level.DEBUG, "Minimized DFA has %d states\n", cdfa.getStateCount());
				}
				for (NamedSequence ref : refs) {
					int activeState = cdfa.getStartState();
					int pos = 0;
					for (int c : ref.getSequence()) {
						if (c<0) {
							activeState = cdfa.getStartState();
							continue;
						}
						activeState = cdfa.getTransitionTarget(activeState, c);
						if (cdfa.getStateOutput(activeState)>0) {
							Log.printf(Log.Level.DEBUG, "Found match ending at position %d\n", pos);
						}
						pos += 1;
					}
				}

			} else {
				Log.startTimer();
				for (NamedSequence ref : refs) {
					BitArray activeStates = nfa.startStates();
					BitArray acceptStates = nfa.acceptStates();
					int pos = 0;
					for (int c : ref.getSequence()) {
						if (c<0) {
							activeStates = nfa.startStates();
							continue;
						}
						activeStates = nfa.transition(activeStates, c);
						if (!activeStates.allZeroAfterAnd(acceptStates)) {
							Log.printf(Log.Level.DEBUG, "Found match ending at position %d\n", pos);
						}
						pos += 1;
					}
				}
			}
			Log.stopTimer("Processing all sequences with current automaton");
		}
		Log.stopTimer("Mapping all reads");
		return 0;
	}

}
